# Licensed to the Apache Software Foundation (ASF) under one
# or more contributor license agreements.  See the NOTICE file
# distributed with this work for additional information
# regarding copyright ownership.  The ASF licenses this file
# to you under the Apache License, Version 2.0 (the
# "License"); you may not use this file except in compliance
# with the License.  You may obtain a copy of the License at
#
#   http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing,
# software distributed under the License is distributed on an
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
# KIND, either express or implied.  See the License for the
# specific language governing permissions and limitations
# under the License.

#' @importFrom stats quantile median na.omit na.exclude na.pass na.fail
#' @importFrom R6 R6Class
#' @importFrom purrr as_mapper map map2 map_chr map2_chr map_dfr map_int map_lgl keep imap imap_chr
#' @importFrom assertthat assert_that is.string
#' @importFrom rlang list2 %||% is_false abort dots_n warn enquo quo_is_null enquos is_integerish quos
#' @importFrom rlang eval_tidy new_data_mask syms env new_environment env_bind set_names exec
#' @importFrom rlang is_bare_character quo_get_expr quo_get_env quo_set_expr .data seq2 is_interactive
#' @importFrom rlang expr caller_env is_character quo_name is_quosure enexpr enexprs as_quosure
#' @importFrom rlang is_list
#' @importFrom tidyselect vars_pull vars_rename vars_select eval_select
#' @useDynLib arrow, .registration = TRUE
#' @keywords internal
"_PACKAGE"

#' @importFrom vctrs s3_register vec_size vec_cast vec_unique
.onLoad <- function(...) {
  dplyr_methods <- paste0(
    "dplyr::",
    c(
      "select", "filter", "collect", "summarise", "group_by", "groups",
      "group_vars", "group_by_drop_default", "ungroup", "mutate", "transmute",
      "arrange", "rename", "pull", "relocate", "compute", "collapse",
      "distinct", "left_join", "right_join", "inner_join", "full_join",
      "semi_join", "anti_join", "count", "tally"
    )
  )
  for (cl in c("Dataset", "ArrowTabular", "arrow_dplyr_query")) {
    for (m in dplyr_methods) {
      s3_register(m, cl)
    }
  }
  s3_register("dplyr::tbl_vars", "arrow_dplyr_query")

  for (cl in c(
    "Array", "RecordBatch", "ChunkedArray", "Table", "Schema",
    "Field", "DataType", "RecordBatchReader"
  )) {
    s3_register("reticulate::py_to_r", paste0("pyarrow.lib.", cl))
    s3_register("reticulate::r_to_py", cl)
  }

  # Create these once, at package build time
  if (arrow_available()) {
    # Also include all available Arrow Compute functions,
    # namespaced as arrow_fun.
    # We can't do this at install time because list_compute_functions() may error
    all_arrow_funs <- list_compute_functions()
    arrow_funcs <- set_names(
      lapply(all_arrow_funs, function(fun) {
        force(fun)
        function(...) build_expr(fun, ...)
      }),
      paste0("arrow_", all_arrow_funs)
    )
    .cache$functions <- c(nse_funcs, arrow_funcs)
  }

  if (tolower(Sys.info()[["sysname"]]) == "windows") {
    # Disable multithreading on Windows
    # See https://issues.apache.org/jira/browse/ARROW-8379
    options(arrow.use_threads = FALSE)
  }

  invisible()
}

.onAttach <- function(libname, pkgname) {
  if (!arrow_available()) {
    msg <- paste(
      "The Arrow C++ library is not available. To retry installation with debug output, run:",
      "    install_arrow(verbose = TRUE)",
      "See https://arrow.apache.org/docs/r/articles/install.html for more guidance and troubleshooting.",
      sep = "\n"
    )
    packageStartupMessage(msg)
  } else {
    # Just to be extra safe, let's wrap this in a try();
    # we don't a failed startup message to prevent the package from loading
    try({
      features <- arrow_info()$capabilities
      # That has all of the #ifdef features, plus the compression libs and the
      # string libraries (but not the memory allocators, they're added elsewhere)
      #
      # Let's print a message if some are off
      if (some_features_are_off(features)) {
        packageStartupMessage("See arrow_info() for available features")
      }
    })
  }
}

#' Is the C++ Arrow library available?
#'
#' You won't generally need to call these function, but they're made available
#' for diagnostic purposes.
#' @return `TRUE` or `FALSE` depending on whether the package was installed
#' with:
#' * The Arrow C++ library (check with `arrow_available()`)
#' * Arrow Dataset support enabled (check with `arrow_with_dataset()`)
#' * Parquet support enabled (check with `arrow_with_parquet()`)
#' * JSON support enabled (check with `arrow_with_json()`)
#' * Amazon S3 support enabled (check with `arrow_with_s3()`)
#' @export
#' @examples
#' arrow_available()
#' arrow_with_dataset()
#' arrow_with_parquet()
#' arrow_with_json()
#' arrow_with_s3()
#' @seealso If any of these are `FALSE`, see
#' `vignette("install", package = "arrow")` for guidance on reinstalling the
#' package.
arrow_available <- function() {
  tryCatch(.Call(`_arrow_available`), error = function(e) {
    return(FALSE)
  })
}

#' @rdname arrow_available
#' @export
arrow_with_dataset <- function() {
  is_32bit <- .Machine$sizeof.pointer < 8
  is_old_r <- getRversion() < "4.0.0"
  is_windows <- tolower(Sys.info()[["sysname"]]) == "windows"
  if (is_32bit && is_old_r && is_windows) {
    # 32-bit rtools 3.5 does not properly implement the std::thread expectations
    # but we can't just disable ARROW_DATASET in that build,
    # so report it as "off" here.
    return(FALSE)
  }
  tryCatch(.Call(`_dataset_available`), error = function(e) {
    return(FALSE)
  })
}

#' @rdname arrow_available
#' @export
arrow_with_parquet <- function() {
  tryCatch(.Call(`_parquet_available`), error = function(e) {
    return(FALSE)
  })
}

#' @rdname arrow_available
#' @export
arrow_with_s3 <- function() {
  tryCatch(.Call(`_s3_available`), error = function(e) {
    return(FALSE)
  })
}

#' @rdname arrow_available
#' @export
arrow_with_json <- function() {
  tryCatch(.Call(`_json_available`), error = function(e) {
    return(FALSE)
  })
}

option_use_threads <- function() {
  !is_false(getOption("arrow.use_threads"))
}

#' Report information on the package's capabilities
#'
#' This function summarizes a number of build-time configurations and run-time
#' settings for the Arrow package. It may be useful for diagnostics.
#' @return A list including version information, boolean "capabilities", and
#' statistics from Arrow's memory allocator, and also Arrow's run-time
#' information.
#' @export
#' @importFrom utils packageVersion
arrow_info <- function() {
  opts <- options()
  out <- list(
    version = packageVersion("arrow"),
    libarrow = arrow_available(),
    options = opts[grep("^arrow\\.", names(opts))]
  )
  if (out$libarrow) {
    pool <- default_memory_pool()
    runtimeinfo <- runtime_info()
    buildinfo <- build_info()
    compute_funcs <- list_compute_functions()
    out <- c(out, list(
      capabilities = c(
        dataset = arrow_with_dataset(),
        parquet = arrow_with_parquet(),
        json = arrow_with_json(),
        s3 = arrow_with_s3(),
        utf8proc = "utf8_upper" %in% compute_funcs,
        re2 = "replace_substring_regex" %in% compute_funcs,
        vapply(tolower(names(CompressionType)[-1]), codec_is_available, logical(1))
      ),
      memory_pool = list(
        backend_name = pool$backend_name,
        bytes_allocated = pool$bytes_allocated,
        max_memory = pool$max_memory,
        available_backends = supported_memory_backends()
      ),
      runtime_info = list(
        simd_level = runtimeinfo[1],
        detected_simd_level = runtimeinfo[2]
      ),
      build_info = list(
        cpp_version = buildinfo[1],
        cpp_compiler = buildinfo[2],
        cpp_compiler_version = buildinfo[3],
        cpp_compiler_flags = buildinfo[4],
        # git_id is "" if not built from a git checkout
        # convert that to NULL
        git_id = if (nzchar(buildinfo[5])) buildinfo[5]
      )
    ))
  }
  structure(out, class = "arrow_info")
}

some_features_are_off <- function(features) {
  # `features` is a named logical vector (as in arrow_info()$capabilities)
  # Let's exclude some less relevant ones
  blocklist <- c("lzo", "bz2", "brotli")
  # Return TRUE if any of the other features are FALSE
  !all(features[setdiff(names(features), blocklist)])
}

#' @export
print.arrow_info <- function(x, ...) {
  print_key_values <- function(title, vals, ...) {
    # Make a key-value table for printing, no column names
    df <- data.frame(vals, stringsAsFactors = FALSE, ...)
    names(df) <- ""

    cat(title, ":\n", sep = "")
    print(df)
    cat("\n")
  }
  cat("Arrow package version: ", format(x$version), "\n\n", sep = "")
  if (x$libarrow) {
    print_key_values("Capabilities", c(
      x$capabilities,
      jemalloc = "jemalloc" %in% x$memory_pool$available_backends,
      mimalloc = "mimalloc" %in% x$memory_pool$available_backends
    ))
    if (some_features_are_off(x$capabilities) && identical(tolower(Sys.info()[["sysname"]]), "linux")) {
      # Only on linux because (e.g.) we disable certain features on purpose on rtools35 and solaris
      cat(
        "To reinstall with more optional capabilities enabled, see\n",
        "  https://arrow.apache.org/docs/r/articles/install.html\n\n"
      )
    }

    if (length(x$options)) {
      print_key_values("Arrow options()", map_chr(x$options, format))
    }

    format_bytes <- function(b, units = "auto", digits = 2L, ...) {
      format(structure(b, class = "object_size"), units = units, digits = digits, ...)
    }
    print_key_values("Memory", c(
      Allocator = x$memory_pool$backend_name,
      # utils:::format.object_size is not properly vectorized
      Current = format_bytes(x$memory_pool$bytes_allocated, ...),
      Max = format_bytes(x$memory_pool$max_memory, ...)
    ))
    print_key_values("Runtime", c(
      `SIMD Level` = x$runtime_info$simd_level,
      `Detected SIMD Level` = x$runtime_info$detected_simd_level
    ))
    print_key_values("Build", c(
      `C++ Library Version` = x$build_info$cpp_version,
      `C++ Compiler` = x$build_info$cpp_compiler,
      `C++ Compiler Version` = x$build_info$cpp_compiler_version,
      `Git ID` = x$build_info$git_id
    ))
  } else {
    cat(
      "Arrow C++ library not available. See https://arrow.apache.org/docs/r/articles/install.html ",
      "for troubleshooting.\n"
    )
  }
  invisible(x)
}

option_compress_metadata <- function() {
  !is_false(getOption("arrow.compress_metadata"))
}

#' @include enums.R
ArrowObject <- R6Class("ArrowObject",
  public = list(
    initialize = function(xp) self$set_pointer(xp),
    pointer = function() get(".:xp:.", envir = self),
    `.:xp:.` = NULL,
    set_pointer = function(xp) {
      if (!inherits(xp, "externalptr")) {
        stop(
          class(self)[1], "$new() requires a pointer as input: ",
          "did you mean $create() instead?",
          call. = FALSE
        )
      }
      assign(".:xp:.", xp, envir = self)
    },
    print = function(...) {
      if (!is.null(self$.class_title)) {
        # Allow subclasses to override just printing the class name first
        class_title <- self$.class_title()
      } else {
        class_title <- class(self)[[1]]
      }
      cat(class_title, "\n", sep = "")
      if (!is.null(self$ToString)) {
        cat(self$ToString(), "\n", sep = "")
      }
      invisible(self)
    },
    invalidate = function() {
      assign(".:xp:.", NULL, envir = self)
    }
  )
)

#' @export
`!=.ArrowObject` <- function(lhs, rhs) !(lhs == rhs) # nolint

#' @export
`==.ArrowObject` <- function(x, y) { # nolint
  x$Equals(y)
}

#' @export
all.equal.ArrowObject <- function(target, current, ..., check.attributes = TRUE) {
  target$Equals(current, check_metadata = check.attributes)
}
